Контрольная работа 2

Подгружаем все

In [1]:
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import matplotlib.lines as mlines

import seaborn as sns
sns.set(font_scale=1.3, style='whitegrid', palette='Set2')
from IPython.display import display

import numpy as np
import numpy.linalg as la
import pandas as pd
import scipy.stats as sps
from scipy.spatial import KDTree

# load and show an image with Pillow
from PIL import Image

import copy
import random
import os

from time import time, sleep
from datetime import datetime

import math
from tqdm.notebook import tqdm

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=UserWarning)

import cvxpy as cvx
print(cvx.installed_solvers())
['CVXOPT', 'ECOS', 'ECOS_BB', 'GLPK', 'GLPK_MI', 'OSQP', 'SCS']
In [2]:
from google.colab import drive 
drive.mount('/content/gdrive/')
Drive already mounted at /content/gdrive/; to attempt to forcibly remount, call drive.mount("/content/gdrive/", force_remount=True).
In [3]:
%ls
%cd 'gdrive/MyDrive/mipt-opt/kr2'
gdrive/  sample_data/
/content/gdrive/MyDrive/mipt-opt/kr2
In [49]:
%%shell
jupyter nbconvert --to html mipt-opt-kr2.ipynb
[NbConvertApp] Converting notebook mipt-opt-kr2.ipynb to html
[NbConvertApp] Writing 11184285 bytes to mipt-opt-kr2.html
Out[49]:

In [5]:
import builtins

class Mocking(object):
    def __init__(self, new_print):
        self.new_print = new_print
        self.real_print = builtins.print
    
    def __enter__(self):
        builtins.print = self.new_print
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        builtins.print = self.real_print

class MockPrint(object):
    def __init__(self, real_print=builtins.print):
        self.real_print = real_print
        self.prev_time = None
    
    def __call__(self, *args, **kwargs):
        arg = datetime.now().time() if self.prev_time is None else datetime.now() - self.prev_time
        self.real_print(f'[{str(arg):>15}]', *args, **kwargs)

    def tik(self):
        self.prev_time = datetime.now()

with Mocking(MockPrint()):
    print('Hello')
    print.tik()
    print("Hello")
    sleep(0.5)
    print("Hello")

print("Hello")
[09:51:28.438030] Hello
[ 0:00:00.000007] Hello
[ 0:00:00.500696] Hello
Hello
In [6]:
def MockingPrint(mock, tik=True):
    def decorator(function):
        def wrapper(*args, **kwargs):
            with Mocking(mock):
                if tik:
                    print.tik()
                return function(*args, **kwargs)
        return wrapper
    return decorator

Подгрузка и визуализация изображений

In [7]:
%ls demo
CAMNS_LP.m  cao1.jpg  demo_CAMNS_LP.m  is_ext_pt.m  ksiwek1.jpg  zhang1.jpg
In [8]:
cao = Image.open('demo/cao1.jpg')
ksiwek = Image.open('demo/ksiwek1.jpg')
zhang = Image.open('demo/zhang1.jpg')

display(cao, ksiwek, zhang)
In [9]:
def mix_images(images, coef):
    return np.dot(np.array(images).T, coef).T

def show_images(images, adjust=False, figsize=(15, 9), shape=None, imshow_params=dict()):
    images = np.array(images)
    if shape is None:
        N = int(np.ceil(np.sqrt(images.shape[0])))
        M = int(np.floor(images.shape[0] / N))
    else:
        N = shape[0]
        M = shape[1]
    
    if adjust:
        figsize=(figsize[0], figsize[0] * N / M)
    
    fig, axs = plt.subplots(N, M, figsize=figsize)
    axs = axs.flatten()

    for im, ax in zip(images, axs):
        ax.imshow(im, **imshow_params)
        ax.axis('off')

    plt.subplots_adjust(wspace=0.0, hspace=0.0)
    plt.tight_layout(w_pad=0.0, h_pad=0.0)
    plt.show()
In [10]:
guys = [
    np.asarray(Image.open('demo/cao1.jpg')),
    np.asarray(Image.open('demo/ksiwek1.jpg')),
    np.asarray(Image.open('demo/zhang1.jpg'))
]

images = []
for _ in range(4):
    images = images + guys

    alphas = np.random.uniform(size=3)
    alphas = alphas / alphas.sum()

    images.append(mix_images(guys, alphas))

show_images(images, adjust=True, figsize=(12, 12), shape=(4, 4), imshow_params={'cmap':'gray', 'vmin':0.0, 'vmax':255.0})

Датасеты

Cifar

(картинки не очень детализированные, поэтому из эстетических соображений с ним работать не будем)

In [ ]:
!wget "http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz" -O cifar-10-python.tar.gz
--2021-12-02 19:41:46--  http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
Resolving www.cs.toronto.edu (www.cs.toronto.edu)... 128.100.3.30
Connecting to www.cs.toronto.edu (www.cs.toronto.edu)|128.100.3.30|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 170498071 (163M) [application/x-gzip]
Saving to: ‘cifar-10-python.tar.gz’

cifar-10-python.tar 100%[===================>] 162.60M  48.2MB/s    in 3.5s    

2021-12-02 19:41:50 (46.9 MB/s) - ‘cifar-10-python.tar.gz’ saved [170498071/170498071]

In [ ]:
%mv cifar-10-python.tar.gz cifar.tar.gz
%ls
cifar-10-batches-py/  demo/        Linnaeus.rar       mipt-opt-kr2.ipynb
cifar.tar.gz          linna_data/  mipt-opt-kr2.html  Sk_NMF.pptx
In [ ]:
!tar -xvzf "cifar.tar.gz"
cifar-10-batches-py/
cifar-10-batches-py/data_batch_4
cifar-10-batches-py/readme.html
cifar-10-batches-py/test_batch
cifar-10-batches-py/data_batch_3
cifar-10-batches-py/batches.meta
cifar-10-batches-py/data_batch_2
cifar-10-batches-py/data_batch_5
cifar-10-batches-py/data_batch_1
In [ ]:
def unpickle(file):
    import pickle
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict
In [ ]:
cifar_images = unpickle('cifar-10-batches-py/data_batch_1')[b'data']
cifar_images
Out[ ]:
array([[ 59,  43,  50, ..., 140,  84,  72],
       [154, 126, 105, ..., 139, 142, 144],
       [255, 253, 253, ...,  83,  83,  84],
       ...,
       [ 71,  60,  74, ...,  68,  69,  68],
       [250, 254, 211, ..., 215, 255, 254],
       [ 62,  61,  60, ..., 130, 130, 131]], dtype=uint8)
In [ ]:
cifar_images = cifar_images.reshape((10000, 3, 32, 32))
cifar_images.shape
Out[ ]:
(10000, 3, 32, 32)
In [ ]:
batch = np.swapaxes(np.swapaxes(cifar_images[0:4],1,2), 2, 3)
show_images(batch, adjust=True, figsize=(6, 6), shape=(2, 2))
In [ ]:

Linnaeus

In [ ]:
!wget "http://chaladze.com/l5/img/Linnaeus 5 128X128.rar" -O 'Linnaeus 5 128X128.rar'
--2021-12-03 19:40:09--  http://chaladze.com/l5/img/Linnaeus%205%20128X128.rar
Resolving chaladze.com (chaladze.com)... 74.208.236.157, 2607:f1c0:100f:f000::25c
Connecting to chaladze.com (chaladze.com)|74.208.236.157|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 138983573 (133M) [application/rar]
Saving to: ‘Linnaeus 5 128X128.rar’

Linnaeus 5 128X128. 100%[===================>] 132.54M  43.7MB/s    in 3.3s    

2021-12-03 19:40:12 (39.8 MB/s) - ‘Linnaeus 5 128X128.rar’ saved [138983573/138983573]

In [ ]:
%mv 'Linnaeus 5 128X128.rar' Linnaeus.rar
%ls
cifar-10-batches-py/  linna_data/       mipt-opt-kr2.html
cifar.tar.gz          linna_data_test/  mipt-opt-kr2.ipynb
demo/                 Linnaeus.rar      Sk_NMF.pptx
In [ ]:
%ls
cifar-10-batches-py/  demo/        Linnaeus.rar       mipt-opt-kr2.ipynb
cifar.tar.gz          linna_data/  mipt-opt-kr2.html  Sk_NMF.pptx
In [ ]:
!mkdir linna_data
!unrar x Linnaeus.rar ./linna_data
In [11]:
path = 'linna_data'
linna_image_paths = []

for root, dirs, files in os.walk(path):
	for file in files:
		linna_image_paths.append(os.path.join(root,file))
In [12]:
len(linna_image_paths)
Out[12]:
8000
In [13]:
N = 3
M = 4

images = []
for _ in range(M):
    new_images = [np.asarray(Image.open(path)) / 255 for path in np.random.choice(linna_image_paths, N, replace=False)]
    images = images + new_images

    alphas = np.random.uniform(size=N)
    alphas = alphas / alphas.sum()

    images.append(mix_images(new_images, alphas))

show_images(images, adjust=True, figsize=(12, 12), shape=(M, N + 1))

Небольшая тренировка и вводные по cvxpy

Функция, которая понадобится в алгоритме

In [14]:
def solve_LP(r, C, d, ProblemType=cvx.Minimize, solve_params=dict()):
    L = C.shape[1]
    alpha = cvx.Variable(L)
    constraints = [ C @ alpha + d >= 0 ]
    obj = ProblemType(r @ (C @ alpha + d))
    prob = cvx.Problem(obj, constraints=constraints)
    prob.solve(**solve_params)

    return prob.value, alpha.value
In [15]:
C = np.array([
    [1, 0],
    [0, 1],
    [-1, -1]
])

d = np.array([-2, -2, 8])
params = {'verbose':False}

r = np.array([0, 0, 1])
p, alpha = solve_LP(r, C, d, solve_params=params)
p, alpha
Out[15]:
(6.745136893471226e-09, array([4., 4.]))
In [16]:
r = np.array([1, 0, 0])
p, alpha = solve_LP(r, C, d, solve_params=params)
p, alpha
Out[16]:
(6.7448908680489694e-09, array([2.00000001, 3.9999939 ]))
In [17]:
r = np.array([0, 1, 0])
p, alpha = solve_LP(r, C, d, solve_params=params)
p, alpha
Out[17]:
(6.744891312138179e-09, array([3.9999939 , 2.00000001]))
In [18]:
r = np.array([1, 1, 0])
p, alpha = solve_LP(r, C, d, ProblemType=cvx.Maximize, solve_params=params)
p, alpha
Out[18]:
(3.999999993254863, array([4., 4.]))
In [19]:
r = np.array([1, 1, 1])
solve_LP(r, C, d, ProblemType=cvx.Maximize, solve_params=params), solve_LP(r, C, d, ProblemType=cvx.Minimize, solve_params=params)  # направление перпендикулярно плоскости
Out[19]:
((4.0, array([3.33333325, 3.33333325])),
 (4.0, array([3.33333325, 3.33333325])))

Алгоритм

In [20]:
def svd_solve(svd_solver, A, k=None, vectors=True, **kwargs):
    import scipy.linalg as sla
    import numpy.linalg as la
    import scipy.sparse.linalg as spla

    if svd_solver == 'numpy':
        if vectors != False:
            U, S, Vh = la.svd(A, compute_uv=True, **kwargs)
        else:
            S = la.svd(A, compute_uv=False, **kwargs)
    elif svd_solver == 'scipy':
        if vectors != False:
            U, S, Vh = sla.svd(A, compute_uv=True, **kwargs)
        else:
            S = sla.svd(A, compute_uv=False, **kwargs)
    elif svd_solver == 'scipy.sparse':
        if k is None:
            raise ValueError("Incorrect k for scipy.sparse.svds")
        U, S, Vh = spla.svds(A, k=k, return_singular_vectors=vectors, **kwargs)
    else:
        raise ValueError("Incorrect solver")

    if vectors == 'u':
        return U, S
    elif vectors == 'vh':
        return S, Vh
    elif vectors == True:
        return U, S, Vh
    return S
In [21]:
def solve_LP(r, C, d, ProblemType=cvx.Minimize, solve_params=dict(), verbose=True):
    if verbose:
        print(f'Searching LP solution')

    L = C.shape[1]
    alpha = cvx.Variable(L)
    constraints = [ C @ alpha + d >= 0 ]
    obj = ProblemType(r @ (C @ alpha + d))
    prob = cvx.Problem(obj, constraints=constraints)
    prob.solve(**solve_params)

    if verbose:
        print(f'Found LP solution: {alpha.value}, value = {prob.value}')

    return prob.value, alpha.value
In [22]:
def is_exp_pt(C, d, alpha, tol=1e-6, verbose=True):
    '''
    Verify if alpha is an extreme point of the polyhedral set
    { alpha | C@alpha + d >= 0 }

    :param (C, d) - the set of polyhedron parameter
    :param alpha - the point to be tested
    :param tol - specifies the numerical tolerance
    :return Is alpha an extreme point
    :rtype bool
    '''
    if verbose:
        print(f'Checking if extreme point: {alpha}')

    T = C[(np.abs(C@alpha + d) < tol), :]

    s = svd_solve('numpy', T, vectors=False)
    s = np.abs(s)
    temp = s / s.sum()
    return np.sum(temp > tol) == C.shape[1]
In [23]:
def get_affine_set(X, N, tol=1e-6, svd_solver='numpy', verbose=True):
    '''
    Generate an affine set (C, d) from matrix
    of observations x
    '''
    if verbose:
        print(f'Generating (C, d) affine set')
    index = np.abs(X).sum(axis=1) > tol
    Xn = X[index, :]

    d = Xn.mean(axis=1)
    params = {'which':'LM'} if svd_solver == 'scipy.sparse' else {'full_matrices':True}
    C, _ = svd_solve(svd_solver, Xn - d[:, np.newaxis], k=N-1, vectors='u', **params)
    C = C[:, :N - 1]

    if verbose:
        print(f'(C, d) affine set found')
    return C, d, index
In [24]:
@MockingPrint(MockPrint())
def CAMNS_LP(X, N, lp_tol=1e-3, ext_tol=1e-6, zeros_tol=1e-6, max_iter=None, svd_solver='numpy', verbose=True):
    '''
    A practical implementation of the CAMNS-LP method

    :param X - the (L, M) observation array, where M is the number of
           observations.
    :param N - the number of sources.
    :param lp_tol - tolerance for numerical errors of LP
    :param ext_tol - tolerance for extreme-point validation
    :param zeros_tol - tolerance for eliminating zero observation points
    :return the L-by-N extracted source matrix, where L is the data length
    '''
    # ------------Step 0
    # -- Get affine set C, d from matrix of observations
    L, M = X.shape
    C, d, index = get_affine_set(X, N, tol=zeros_tol, svd_solver=svd_solver, verbose=verbose)
    LL = index.shape[0]

    # --------LP Extreme-Point Finding Algorithm [Table 1]---------------
    # ------------Step 1
    S = np.empty(shape=(LL, 0))
    lp_cnt = 0
    iter = 0
    
    # B = np.eye(LL)
    Q = np.zeros(shape=(LL, LL))

    # --------------Step 7 (repeat until l == N)
    while S.shape[1] < N and (max_iter is None or iter < max_iter): 
        iter += 1
        if verbose:
            print(f'Starting iter {iter}')
        # --------------Step 2
        w = sps.norm.rvs(size=LL)
        
        # aka r = B @ w = (I_LL - QQ^T)w
        r = w - Q @ (Q.T @ w)

        # --------------Step 3
        p_star, alpha_1 = solve_LP(r, C, d, ProblemType=cvx.Minimize, verbose=verbose)
        q_star, alpha_2 = solve_LP(r, C, d, ProblemType=cvx.Maximize, verbose=verbose)

        c_1 = C @ alpha_1 + d
        c_2 = C @ alpha_2 + d
        
        # --------------Step 4-5
        if (S.shape[1] == 0 or np.abs(p_star) / (la.norm(r) * la.norm(c_1))
            >= lp_tol) and is_exp_pt(C, d, alpha_1, tol=ext_tol, verbose=verbose):
            S = np.append(S, c_1[:, np.newaxis], axis=1)

        if (S.shape[1] == 0 or np.abs(q_star) / (la.norm(r) * la.norm(c_2))
            >= lp_tol) and is_exp_pt(C, d, alpha_2, tol=ext_tol, verbose=verbose):
            S = np.append(S, c_2[:, np.newaxis], axis=1)
        
        # --------------Step 6
        Q, _ = la.qr(S)
        if verbose:
            print(f'{S.shape[1]} extreme points found')
    
    if S.shape[1] > N:  # actually l = S.shape[1] = N + 1
        S = S[:, :N] if p_star > q_star else S[:, np.arange(N + 1) != N]

    ans = np.zeros(shape=(L, N))
    ans[index, :] = S
    return ans

Работа алгоритма

Посмотрим на небольшой пример:

  • N - число реальных векторов
  • M - число наблюдений
  • L - количество данных (т.е. пиксели х каналы)
In [25]:
def find_closests(real, result):
    tree = KDTree(result.T)
    return tree.query(real.T)
In [26]:
def experiment(N, M, L, verbose=True):
    real = np.random.uniform(size=(L, N))
    A = np.random.uniform(size=(N, M))
    A = A / A.sum(axis=0)
    observ = real @ A

    if verbose:
        print("--------------Algorithm output--------------")
    result = CAMNS_LP(observ, N, svd_solver='scipy.sparse', verbose=verbose)
    if verbose:
        print("-------------------End----------------------\n\n")


    print('--------Maxs and mins of result vectors:-----')
    for j in result.T:
        print("({:0.3f}, {:0.3f})".format(np.min(j), np.max(j)), end=" ")
    print('\n--------------------------------------------\n\n')

    ordinal = lambda n: "%d%s" % (n,"tsnrhtdd"[(n//10%10!=1)*(n%10<4)*n%10::4])
    tree = KDTree(result.T)

    dists, inds = find_closests(real, result)

    for i, (dist, ind) in enumerate(zip(dists, inds)):
        print(f"For {ordinal(i)} real vector the closest result is {ordinal(ind)}, distance {dist:.03f}")
In [27]:
experiment(3, 10, 128*128)
--------------Algorithm output--------------
[ 0:00:00.000016] Generating (C, d) affine set
[ 0:00:00.019522] (C, d) affine set found
[ 0:00:01.152984] Starting iter 1
[ 0:00:01.484954] Searching LP solution
[ 0:00:01.558094] Found LP solution: [ 18.55157603 -24.49952457], value = 16.01521018342889
[ 0:00:01.558334] Searching LP solution
[ 0:00:01.627490] Found LP solution: [-26.33306698   3.15914012], value = 48.382797657169604
[ 0:00:01.629065] Checking if extreme point: [ 18.55157603 -24.49952457]
[ 0:00:01.634507] Checking if extreme point: [-26.33306698   3.15914012]
[ 0:00:01.731667] 2 extreme points found
[ 0:00:01.732185] Starting iter 2
[ 0:00:01.737176] Searching LP solution
[ 0:00:01.818279] Found LP solution: [19.77194563 27.69888794], value = -9.510829162341292
[ 0:00:01.818497] Searching LP solution
[ 0:00:01.885105] Found LP solution: [-22.91325015   0.59546619], value = 0.08196416026026832
[ 0:00:01.886725] Checking if extreme point: [19.77194563 27.69888794]
[ 0:00:01.889163] 3 extreme points found
-------------------End----------------------


--------Maxs and mins of result vectors:-----
(-0.000, 1.000) (0.000, 1.000) (-0.000, 1.000) 
--------------------------------------------


For 0th real vector the closest result is 0th, distance 0.004
For 1st real vector the closest result is 2nd, distance 0.011
For 2nd real vector the closest result is 1st, distance 0.212
In [28]:
experiment(3, 10, 1000)
--------------Algorithm output--------------
[ 0:00:00.000014] Generating (C, d) affine set
[ 0:00:00.006632] (C, d) affine set found
[ 0:00:00.008662] Starting iter 1
[ 0:00:00.010367] Searching LP solution
[ 0:00:00.020549] Found LP solution: [-0.17116329  8.04897936], value = -2.2113485675939266
[ 0:00:00.020683] Searching LP solution
[ 0:00:00.031385] Found LP solution: [ 5.12394668 -3.8149092 ], value = 19.23182816634823
[ 0:00:00.032509] Checking if extreme point: [-0.17116329  8.04897936]
[ 0:00:00.033519] Checking if extreme point: [ 5.12394668 -3.8149092 ]
[ 0:00:00.050763] 2 extreme points found
[ 0:00:00.051669] Starting iter 2
[ 0:00:00.053173] Searching LP solution
[ 0:00:00.062670] Found LP solution: [-7.35627762 -3.06525973], value = -10.67525230029494
[ 0:00:00.062808] Searching LP solution
[ 0:00:00.073127] Found LP solution: [ 5.49923127 -2.22272685], value = 0.9544465065939725
[ 0:00:00.074529] Checking if extreme point: [-7.35627762 -3.06525973]
[ 0:00:00.075795] Checking if extreme point: [ 5.49923127 -2.22272685]
[ 0:00:00.077032] 4 extreme points found
-------------------End----------------------


--------Maxs and mins of result vectors:-----
(0.000, 1.001) (0.000, 1.002) (0.000, 1.009) 
--------------------------------------------


For 0th real vector the closest result is 2nd, distance 0.260
For 1st real vector the closest result is 1st, distance 0.382
For 2nd real vector the closest result is 0th, distance 0.020
In [29]:
experiment(3, 10, 100)
--------------Algorithm output--------------
[ 0:00:00.000019] Generating (C, d) affine set
[ 0:00:00.001663] (C, d) affine set found
[ 0:00:00.001761] Starting iter 1
[ 0:00:00.002385] Searching LP solution
[ 0:00:00.013676] Found LP solution: [ 1.81178069 -2.33680645], value = -4.909197940395788
[ 0:00:00.014389] Searching LP solution
[ 0:00:00.020865] Found LP solution: [0.04630812 2.29203016], value = -1.7350483826132808
[ 0:00:00.021806] Checking if extreme point: [ 1.81178069 -2.33680645]
[ 0:00:00.022861] Checking if extreme point: [0.04630812 2.29203016]
[ 0:00:00.023817] 2 extreme points found
[ 0:00:00.024460] Starting iter 2
[ 0:00:00.025116] Searching LP solution
[ 0:00:00.030971] Found LP solution: [1.82985842 1.45468783], value = -0.3057765342102952
[ 0:00:00.031639] Searching LP solution
[ 0:00:00.037732] Found LP solution: [-2.55449517 -1.17844656], value = 0.8195775452356953
[ 0:00:00.038790] Checking if extreme point: [1.82985842 1.45468783]
[ 0:00:00.039940] Checking if extreme point: [-2.55449517 -1.17844656]
[ 0:00:00.040971] 4 extreme points found
-------------------End----------------------


--------Maxs and mins of result vectors:-----
(0.000, 1.048) (0.000, 1.006) (0.000, 1.161) 
--------------------------------------------


For 0th real vector the closest result is 1st, distance 3.813
For 1st real vector the closest result is 0th, distance 0.783
For 2nd real vector the closest result is 1st, distance 0.096

В статье упоминается, что алгоритм хорошо работает при $L \gg max(M, N)$

На практике видим, как с уменьшением размера данных точность алгоритма ухудшается.

Поиграемся с учеными

In [74]:
guys = np.array(guys)

def mix_scientists(M=3, figsize=(12, 12), svd_solver='numpy', show=True, verbose=True):
    images = guys
    images = np.append(images, np.ones(shape=(M - 3, 128, 128)) * 255, axis=0)

    alphas = np.random.uniform(size=(M, 3))
    alphas = alphas / alphas.sum(axis=1)[:, np.newaxis]

    mixes = np.tensordot(alphas, guys, axes=(1, 0))

    print("--------------Algorithm output--------------")
    result = CAMNS_LP(mixes.reshape(M, 128*128).T, 3, svd_solver=svd_solver, verbose=verbose).T.reshape(3, 128, 128)
    print("-------------------End----------------------\n\n")

    dists, inds = find_closests(guys.reshape(3, 128*128).T, result.reshape(3, 128*128).T)

    images = np.append(images, mixes, axis=0)
    images = np.append(images, result, axis=0)
    images = np.append(images, np.ones(shape=(M - 3, 128, 128)) * 255, axis=0)
    images = np.append(images, result[inds, :, :], axis=0)
    images = np.append(images, np.ones(shape=(M - 3, 128, 128)) * 255, axis=0)

    if show:
        show_images(images, adjust=True, figsize=figsize, shape=(4, M), imshow_params={'cmap':'gray', 'vmin':0.0, 'vmax':255.0})
In [37]:
mix_scientists(M=3, figsize=(9, 9), svd_solver='numpy', show=False)
mix_scientists(M=3, figsize=(9, 9), svd_solver='scipy', show=False)
mix_scientists(M=3, figsize=(9, 9), svd_solver='scipy.sparse', show=True)
--------------Algorithm output--------------
[ 0:00:00.000013] Generating (C, d) affine set
[ 0:00:05.587574] (C, d) affine set found
[ 0:00:06.060606] Starting iter 1
[ 0:00:06.393042] Searching LP solution
[ 0:00:06.485656] Found LP solution: [  -21.57298945 -3259.60454656], value = 9168.559423669758
[ 0:00:06.485873] Searching LP solution
[ 0:00:06.556799] Found LP solution: [-5468.2718732   4005.12421271], value = 18006.54905693856
[ 0:00:06.558520] Checking if extreme point: [  -21.57298945 -3259.60454656]
[ 0:00:06.560164] Checking if extreme point: [-5468.2718732   4005.12421271]
[ 0:00:06.578002] 2 extreme points found
[ 0:00:06.578120] Starting iter 2
[ 0:00:06.579670] Searching LP solution
[ 0:00:06.889375] Found LP solution: [3173.5659704   565.35243316], value = -154.1750724596456
[ 0:00:06.890202] Searching LP solution
[ 0:00:07.179520] Found LP solution: [-5680.32070279  2977.7735755 ], value = 24.979243594330697
[ 0:00:07.181802] 2 extreme points found
[ 0:00:07.182687] Starting iter 3
[ 0:00:07.184661] Searching LP solution
[ 0:00:07.494901] Found LP solution: [-5680.32070151  2977.77357379], value = -26.10669133897632
[ 0:00:07.495098] Searching LP solution
[ 0:00:07.778923] Found LP solution: [3173.5659704   565.35243317], value = 161.1338235134521
[ 0:00:07.781367] 2 extreme points found
[ 0:00:07.782073] Starting iter 4
[ 0:00:07.783652] Searching LP solution
[ 0:00:07.879539] Found LP solution: [3173.56597037  565.35243365], value = -5223.106247559679
[ 0:00:07.880521] Searching LP solution
[ 0:00:08.174905] Found LP solution: [-5680.32070684  2977.77358092], value = 846.2408428797321
[ 0:00:08.182421] Checking if extreme point: [3173.56597037  565.35243365]
[ 0:00:08.185482] 3 extreme points found
-------------------End----------------------


--------------Algorithm output--------------
[ 0:00:00.000016] Generating (C, d) affine set
[ 0:00:01.647072] (C, d) affine set found
[ 0:00:02.113634] Starting iter 1
[ 0:00:02.448081] Searching LP solution
[ 0:00:02.522751] Found LP solution: [3139.00526404 1842.27921253], value = 9832.951794287574
[ 0:00:02.522949] Searching LP solution
[ 0:00:02.600972] Found LP solution: [-6044.17599218  -106.95592831], value = 14479.481900710663
[ 0:00:02.602559] Checking if extreme point: [3139.00526404 1842.27921253]
[ 0:00:02.604150] Checking if extreme point: [-6044.17599218  -106.95592831]
[ 0:00:02.622583] 2 extreme points found
[ 0:00:02.622776] Starting iter 2
[ 0:00:02.624503] Searching LP solution
[ 0:00:02.715667] Found LP solution: [-5198.58058259   654.43505902], value = -162.33138430113877
[ 0:00:02.716176] Searching LP solution
[ 0:00:02.794009] Found LP solution: [ 3156.13705784 -3141.58629266], value = 1391.34377515378
[ 0:00:02.795903] 2 extreme points found
[ 0:00:02.796337] Starting iter 3
[ 0:00:02.797729] Searching LP solution
[ 0:00:02.882892] Found LP solution: [-5198.58058259   654.43505902], value = -544.1731393836408
[ 0:00:02.883098] Searching LP solution
[ 0:00:02.963346] Found LP solution: [ 3156.13705803 -3141.58629262], value = 4664.112939997514
[ 0:00:02.978992] Checking if extreme point: [ 3156.13705803 -3141.58629262]
[ 0:00:02.981389] 3 extreme points found
-------------------End----------------------


--------------Algorithm output--------------
[ 0:00:00.000012] Generating (C, d) affine set
[ 0:00:00.004135] (C, d) affine set found
[ 0:00:00.305305] Starting iter 1
[ 0:00:00.637901] Searching LP solution
[ 0:00:00.712148] Found LP solution: [-1435.85737132 -5693.16845539], value = 12734.221982598456
[ 0:00:00.712368] Searching LP solution
[ 0:00:00.773226] Found LP solution: [1894.08096147 3084.18002344], value = 29022.153926062474
[ 0:00:00.774799] Checking if extreme point: [-1435.85737132 -5693.16845539]
[ 0:00:00.776457] Checking if extreme point: [1894.08096147 3084.18002344]
[ 0:00:00.794415] 2 extreme points found
[ 0:00:00.794523] Starting iter 2
[ 0:00:00.796147] Searching LP solution
[ 0:00:01.110906] Found LP solution: [ -554.16731819 -4973.88375654], value = -88.63029760058475
[ 0:00:01.111032] Searching LP solution
[ 0:00:01.386973] Found LP solution: [-3028.60621278  3862.87114168], value = 759.6510890124622
[ 0:00:01.395299] 2 extreme points found
[ 0:00:01.395958] Starting iter 3
[ 0:00:01.398520] Searching LP solution
[ 0:00:01.698222] Found LP solution: [-3028.60621244  3862.87114048], value = -1230.8654657075394
[ 0:00:01.699337] Searching LP solution
[ 0:00:01.978889] Found LP solution: [ -554.16731769 -4973.88375541], value = 143.60799864515843
[ 0:00:01.990651] 2 extreme points found
[ 0:00:01.991396] Starting iter 4
[ 0:00:01.993181] Searching LP solution
[ 0:00:02.060184] Found LP solution: [ -554.16731735 -4973.88375459], value = -361.4722512745298
[ 0:00:02.061040] Searching LP solution
[ 0:00:02.135679] Found LP solution: [-3028.60621264  3862.87114237], value = 3098.181961719251
[ 0:00:02.137248] Checking if extreme point: [-3028.60621264  3862.87114237]
[ 0:00:02.139379] 3 extreme points found
-------------------End----------------------


In [38]:
mix_scientists(M=4, figsize=(9, 9), svd_solver='numpy', show=False)
mix_scientists(M=4, figsize=(9, 9), svd_solver='scipy', show=False)
mix_scientists(M=4, figsize=(9, 9), svd_solver='scipy.sparse', show=True)
--------------Algorithm output--------------
[ 0:00:00.000012] Generating (C, d) affine set
[ 0:00:05.967628] (C, d) affine set found
[ 0:00:06.439459] Starting iter 1
[ 0:00:06.781520] Searching LP solution
[ 0:00:06.871987] Found LP solution: [-5117.54102193   255.72364033], value = 21234.25330920436
[ 0:00:06.872863] Searching LP solution
[ 0:00:06.950842] Found LP solution: [ 4437.57801732 -2329.2058779 ], value = 32025.10856221197
[ 0:00:06.953661] Checking if extreme point: [-5117.54102193   255.72364033]
[ 0:00:06.955241] Checking if extreme point: [ 4437.57801732 -2329.2058779 ]
[ 0:00:06.973434] 2 extreme points found
[ 0:00:06.973542] Starting iter 2
[ 0:00:06.975057] Searching LP solution
[ 0:00:07.284328] Found LP solution: [-5336.73462569  -649.93917057], value = -539.1927391783166
[ 0:00:07.285650] Searching LP solution
[ 0:00:07.568060] Found LP solution: [3252.65022734 2293.74564425], value = 2404.05623437209
[ 0:00:07.575766] 2 extreme points found
[ 0:00:07.575902] Starting iter 3
[ 0:00:07.577707] Searching LP solution
[ 0:00:07.653142] Found LP solution: [3252.65022832 2293.74564463], value = -1518.4743185879606
[ 0:00:07.653338] Searching LP solution
[ 0:00:07.737904] Found LP solution: [-5336.73464016  -649.93916772], value = 340.5703728070104
[ 0:00:07.740039] 2 extreme points found
[ 0:00:07.740207] Starting iter 4
[ 0:00:07.741892] Searching LP solution
[ 0:00:08.062164] Found LP solution: [-5336.73462952  -649.93916957], value = -372.96013398755014
[ 0:00:08.062401] Searching LP solution
[ 0:00:08.124639] Found LP solution: [3252.65022829 2293.74564461], value = 1662.887999466714
[ 0:00:08.126961] 2 extreme points found
[ 0:00:08.127088] Starting iter 5
[ 0:00:08.128668] Searching LP solution
[ 0:00:08.208659] Found LP solution: [3252.65022835 2293.74564465], value = -892.1817568379299
[ 0:00:08.208892] Searching LP solution
[ 0:00:08.506745] Found LP solution: [-5336.73462229  -649.93917124], value = 200.1026091786857
[ 0:00:08.509054] 2 extreme points found
[ 0:00:08.509147] Starting iter 6
[ 0:00:08.511257] Searching LP solution
[ 0:00:08.582864] Found LP solution: [3252.65022834 2293.74564465], value = -947.9143351627974
[ 0:00:08.583080] Searching LP solution
[ 0:00:08.670715] Found LP solution: [-5336.73464016  -649.93916772], value = 212.6025673016954
[ 0:00:08.674445] 2 extreme points found
[ 0:00:08.674603] Starting iter 7
[ 0:00:08.676653] Searching LP solution
[ 0:00:08.759024] Found LP solution: [3252.65022829 2293.74564458], value = -4181.253847670507
[ 0:00:08.760665] Searching LP solution
[ 0:00:08.835781] Found LP solution: [-5336.73463987  -649.93916779], value = 937.790757651472
[ 0:00:08.838932] Checking if extreme point: [3252.65022829 2293.74564458]
[ 0:00:08.841222] 3 extreme points found
-------------------End----------------------


--------------Algorithm output--------------
[ 0:00:00.000013] Generating (C, d) affine set
[ 0:00:01.940960] (C, d) affine set found
[ 0:00:02.425912] Starting iter 1
[ 0:00:02.759972] Searching LP solution
[ 0:00:02.846844] Found LP solution: [-4525.91524808 -3479.39920511], value = -4190.105428335162
[ 0:00:02.847065] Searching LP solution
[ 0:00:02.914071] Found LP solution: [4878.36312234  701.82320501], value = 8033.168041441488
[ 0:00:02.915417] Checking if extreme point: [-4525.91524808 -3479.39920511]
[ 0:00:02.916976] Checking if extreme point: [4878.36312234  701.82320501]
[ 0:00:02.935170] 2 extreme points found
[ 0:00:02.935279] Starting iter 2
[ 0:00:02.936920] Searching LP solution
[ 0:00:03.016112] Found LP solution: [-3662.16103242 -3855.3729547 ], value = -570.6741553535794
[ 0:00:03.017761] Searching LP solution
[ 0:00:03.088515] Found LP solution: [-4502.64457424  1058.23618388], value = 3399.4547721560552
[ 0:00:03.089788] Checking if extreme point: [-4502.64457424  1058.23618388]
[ 0:00:03.092167] 3 extreme points found
-------------------End----------------------


--------------Algorithm output--------------
[ 0:00:00.000017] Generating (C, d) affine set
[ 0:00:00.003688] (C, d) affine set found
[ 0:00:00.312917] Starting iter 1
[ 0:00:00.640845] Searching LP solution
[ 0:00:00.725647] Found LP solution: [2854.20018143 7013.1187815 ], value = -33371.467219667364
[ 0:00:00.726595] Searching LP solution
[ 0:00:00.800077] Found LP solution: [-3064.54053077 -1519.25475985], value = -20695.591101372826
[ 0:00:00.801160] Checking if extreme point: [2854.20018143 7013.1187815 ]
[ 0:00:00.802531] Checking if extreme point: [-3064.54053077 -1519.25475985]
[ 0:00:00.819267] 2 extreme points found
[ 0:00:00.819967] Starting iter 2
[ 0:00:00.821594] Searching LP solution
[ 0:00:00.913023] Found LP solution: [ 1650.75717628 -2255.25895572], value = -4362.67362939354
[ 0:00:00.914785] Searching LP solution
[ 0:00:00.983662] Found LP solution: [-3098.43956055  -740.30111782], value = 479.39362544536493
[ 0:00:00.984843] Checking if extreme point: [ 1650.75717628 -2255.25895572]
[ 0:00:00.986981] 3 extreme points found
-------------------End----------------------


In [39]:
mix_scientists(M=5, figsize=(9, 9), svd_solver='numpy', show=False)
mix_scientists(M=5, figsize=(9, 9), svd_solver='scipy', show=False)
mix_scientists(M=5, figsize=(9, 9), svd_solver='scipy.sparse', show=True)
--------------Algorithm output--------------
[ 0:00:00.000013] Generating (C, d) affine set
[ 0:00:06.318785] (C, d) affine set found
[ 0:00:06.791291] Starting iter 1
[ 0:00:07.123806] Searching LP solution
[ 0:00:07.202976] Found LP solution: [-6108.37033921   934.08119276], value = 12110.766872633194
[ 0:00:07.208263] Searching LP solution
[ 0:00:07.278338] Found LP solution: [ 2576.59137148 -2629.87387043], value = 16460.32220384106
[ 0:00:07.279889] Checking if extreme point: [-6108.37033921   934.08119276]
[ 0:00:07.281505] Checking if extreme point: [ 2576.59137148 -2629.87387043]
[ 0:00:07.300181] 2 extreme points found
[ 0:00:07.300900] Starting iter 2
[ 0:00:07.302538] Searching LP solution
[ 0:00:07.384645] Found LP solution: [3486.91944613 2270.17834046], value = -7700.47446949663
[ 0:00:07.387404] Searching LP solution
[ 0:00:07.457146] Found LP solution: [-5412.97119915    33.43261115], value = 898.4326545734493
[ 0:00:07.460666] Checking if extreme point: [3486.91944613 2270.17834046]
[ 0:00:07.463047] 3 extreme points found
-------------------End----------------------


--------------Algorithm output--------------
[ 0:00:00.000020] Generating (C, d) affine set
[ 0:00:02.303364] (C, d) affine set found
[ 0:00:02.780092] Starting iter 1
[ 0:00:03.119228] Searching LP solution
[ 0:00:03.207494] Found LP solution: [-6695.67920764 -1698.12449108], value = -4012.1036510273516
[ 0:00:03.208399] Searching LP solution
[ 0:00:03.287485] Found LP solution: [ 3687.12952793 -1871.82721832], value = 15442.282422294873
[ 0:00:03.289009] Checking if extreme point: [-6695.67920764 -1698.12449108]
[ 0:00:03.292499] Checking if extreme point: [ 3687.12952793 -1871.82721832]
[ 0:00:03.310830] 2 extreme points found
[ 0:00:03.311554] Starting iter 2
[ 0:00:03.313176] Searching LP solution
[ 0:00:03.409776] Found LP solution: [ 3058.60727211 -2333.21371886], value = -1172.6353654677669
[ 0:00:03.410863] Searching LP solution
[ 0:00:03.482687] Found LP solution: [1676.40642098 2456.30485357], value = 10671.450589207234
[ 0:00:03.483964] Checking if extreme point: [1676.40642098 2456.30485357]
[ 0:00:03.486048] 3 extreme points found
-------------------End----------------------


--------------Algorithm output--------------
[ 0:00:00.000019] Generating (C, d) affine set
[ 0:00:00.004283] (C, d) affine set found
[ 0:00:00.313437] Starting iter 1
[ 0:00:00.645111] Searching LP solution
[ 0:00:00.729250] Found LP solution: [-8023.21845292  2653.09882813], value = -40274.975762776725
[ 0:00:00.729471] Searching LP solution
[ 0:00:00.820611] Found LP solution: [1363.36522789 2503.49126182], value = -29150.182584242993
[ 0:00:00.822113] Checking if extreme point: [-8023.21845292  2653.09882813]
[ 0:00:00.823487] Checking if extreme point: [1363.36522789 2503.49126182]
[ 0:00:00.840650] 2 extreme points found
[ 0:00:00.841344] Starting iter 2
[ 0:00:00.842933] Searching LP solution
[ 0:00:00.929086] Found LP solution: [-7029.01371172  3206.54689625], value = -637.6530930348404
[ 0:00:00.930785] Searching LP solution
[ 0:00:01.014967] Found LP solution: [  267.67503596 -2358.47046944], value = 5465.330475382925
[ 0:00:01.016197] Checking if extreme point: [  267.67503596 -2358.47046944]
[ 0:00:01.018389] 3 extreme points found
-------------------End----------------------


В результате видим, что scipy.sparse.linalg.svds в данном случае работает сильно быстрее, будем далее пользоваться им

Поиграем теперь с рандомными (серыми) изображениями из датасета Linnaeus

In [77]:
def mix_random_linnaeus_grey(N=3, M=3, figsize=(12, 12), svd_solver='numpy', show=True, verbose=True):
    real = np.array([np.asarray(Image.open(path).convert('L')) for path in np.random.choice(linna_image_paths, N, replace=False)])
    images = np.append(real, np.ones(shape=(M - N, 128, 128))*255, axis=0)

    alphas = np.random.uniform(size=(M, N))
    alphas = alphas / alphas.sum(axis=1)[:, np.newaxis]
    mixes = np.tensordot(alphas, real, axes=(1, 0))

    if verbose:
        print("--------------Algorithm output--------------")
    result = CAMNS_LP(mixes.reshape(M, 128*128).T, N, svd_solver=svd_solver, verbose=verbose).T.reshape(N, 128, 128)
    if verbose:
        print("-------------------End----------------------\n\n")

    dists, inds = find_closests(real.reshape(N, 128*128).T, result.reshape(N, 128*128).T)

    images = np.append(images, mixes, axis=0)
    images = np.append(images, result, axis=0)
    images = np.append(images, np.ones(shape=(M - N, 128, 128))*255, axis=0)
    images = np.append(images, result[inds, :, :], axis=0)
    images = np.append(images, np.ones(shape=(M - N, 128, 128))*255, axis=0)

    if show:
        show_images(images, adjust=True, figsize=figsize, shape=(4, M), imshow_params={'cmap':'gray', 'vmin':0.0, 'vmax':255.0})
In [41]:
mix_random_linnaeus_grey(N=3, M=3, figsize=(9, 9), svd_solver='scipy.sparse')
--------------Algorithm output--------------
[ 0:00:00.000017] Generating (C, d) affine set
[ 0:00:00.003040] (C, d) affine set found
[ 0:00:00.315532] Starting iter 1
[ 0:00:00.655671] Searching LP solution
[ 0:00:00.724453] Found LP solution: [2480.66078843 5499.69391071], value = -20193.247178395686
[ 0:00:00.725369] Searching LP solution
[ 0:00:00.796003] Found LP solution: [-7608.23111775 -2864.33138512], value = -2011.9168325443043
[ 0:00:00.797068] Checking if extreme point: [2480.66078843 5499.69391071]
[ 0:00:00.813971] 1 extreme points found
[ 0:00:00.814831] Starting iter 2
[ 0:00:00.816562] Searching LP solution
[ 0:00:00.908163] Found LP solution: [1109.82303224 6364.46852936], value = -1620.7164363458633
[ 0:00:00.909256] Searching LP solution
[ 0:00:00.968166] Found LP solution: [ 1174.85589986 -3585.5507524 ], value = 14149.86851081158
[ 0:00:00.969770] Checking if extreme point: [ 1174.85589986 -3585.5507524 ]
[ 0:00:00.971380] 2 extreme points found
[ 0:00:00.971704] Starting iter 3
[ 0:00:00.973330] Searching LP solution
[ 0:00:01.044194] Found LP solution: [ 1504.11540784 -3457.93540375], value = -507.41050711598655
[ 0:00:01.044566] Searching LP solution
[ 0:00:01.116663] Found LP solution: [-8558.68915545  -625.15756049], value = 16579.318829700063
[ 0:00:01.119313] Checking if extreme point: [-8558.68915545  -625.15756049]
[ 0:00:01.121533] 3 extreme points found
-------------------End----------------------


In [42]:
mix_random_linnaeus_grey(N=4, M=5, figsize=(12, 12), svd_solver='scipy.sparse')
--------------Algorithm output--------------
[ 0:00:00.000021] Generating (C, d) affine set
[ 0:00:00.005930] (C, d) affine set found
[ 0:00:00.313882] Starting iter 1
[ 0:00:00.645666] Searching LP solution
[ 0:00:00.772449] Found LP solution: [ 2257.76480135 -6433.44382115 -7076.82236475], value = -24149.17588763996
[ 0:00:00.772670] Searching LP solution
[ 0:00:00.869013] Found LP solution: [-1342.07377799  3926.90789213   867.36513386], value = 1969.7912605417496
[ 0:00:00.872053] Checking if extreme point: [ 2257.76480135 -6433.44382115 -7076.82236475]
[ 0:00:00.873363] Checking if extreme point: [-1342.07377799  3926.90789213   867.36513386]
[ 0:00:00.877693] 2 extreme points found
[ 0:00:00.878468] Starting iter 2
[ 0:00:00.880276] Searching LP solution
[ 0:00:01.010299] Found LP solution: [ -438.47992141   500.13161159 -4855.41640388], value = -3253.815738193982
[ 0:00:01.011088] Searching LP solution
[ 0:00:01.110390] Found LP solution: [ 4078.14958897 -2215.10082236  2466.51062486], value = 8238.995405136468
[ 0:00:01.113086] Checking if extreme point: [ -438.47992141   500.13161159 -4855.41640388]
[ 0:00:01.114819] Checking if extreme point: [ 4078.14958897 -2215.10082236  2466.51062486]
[ 0:00:01.117809] 4 extreme points found
-------------------End----------------------


In [43]:
mix_random_linnaeus_grey(N=5, M=7, figsize=(12, 12), svd_solver='scipy.sparse')
--------------Algorithm output--------------
[ 0:00:00.000016] Generating (C, d) affine set
[ 0:00:00.007353] (C, d) affine set found
[ 0:00:00.336750] Starting iter 1
[ 0:00:00.672186] Searching LP solution
[ 0:00:00.815232] Found LP solution: [-3800.79686334  4214.49083366 -1809.10174799   431.97335062], value = -8839.829347275976
[ 0:00:00.815899] Searching LP solution
[ 0:00:00.934069] Found LP solution: [ 3655.69347758 -3363.71385668  -806.60688493  -657.25041752], value = 3493.9954319741128
[ 0:00:00.939481] Checking if extreme point: [-3800.79686334  4214.49083366 -1809.10174799   431.97335062]
[ 0:00:00.940894] Checking if extreme point: [ 3655.69347758 -3363.71385668  -806.60688493  -657.25041752]
[ 0:00:00.945094] 2 extreme points found
[ 0:00:00.945753] Starting iter 2
[ 0:00:00.947407] Searching LP solution
[ 0:00:01.110683] Found LP solution: [ 5671.45080803  4010.21451653  4139.31568536 -3028.30595292], value = -10246.095864940848
[ 0:00:01.111406] Searching LP solution
[ 0:00:01.226110] Found LP solution: [  978.17764256 -3140.03612879 -6764.49438071 -1451.62234611], value = 8511.45329086782
[ 0:00:01.227349] Checking if extreme point: [ 5671.45080803  4010.21451653  4139.31568536 -3028.30595292]
[ 0:00:01.228916] Checking if extreme point: [  978.17764256 -3140.03612879 -6764.49438071 -1451.62234611]
[ 0:00:01.237584] 4 extreme points found
[ 0:00:01.237673] Starting iter 3
[ 0:00:01.239332] Searching LP solution
[ 0:00:01.397469] Found LP solution: [-5137.31854652 -2209.85636706  3031.49917214 -5558.64738479], value = -4800.165090660856
[ 0:00:01.398219] Searching LP solution
[ 0:00:01.547785] Found LP solution: [ 2928.03612417 -2745.0142408   1132.59174414  8221.48683371], value = 3319.7986604077273
[ 0:00:01.550143] Checking if extreme point: [-5137.31854652 -2209.85636706  3031.49917214 -5558.64738479]
[ 0:00:01.552099] Checking if extreme point: [ 2928.03612417 -2745.0142408   1132.59174414  8221.48683371]
[ 0:00:01.555241] 6 extreme points found
-------------------End----------------------


In [79]:
mix_random_linnaeus_grey(N=10, M=10, figsize=(12, 12), svd_solver='scipy.sparse', verbose=False)

Видим, что при M равным или лишь чуть больше N, алгоритм отрабатывает не очень. Посмотрим, что будет, если увеличить значение M

In [80]:
mix_random_linnaeus_grey(N=4, M=7, figsize=(12, 20), svd_solver='scipy.sparse', verbose=False)
In [81]:
mix_random_linnaeus_grey(N=5, M=10, figsize=(15, 25), svd_solver='scipy.sparse', verbose=False)
In [85]:
mix_random_linnaeus_grey(N=8, M=16, figsize=(20, 25), svd_solver='scipy.sparse', verbose=False)

Потрогаем произвольные цветные изображения из датасета Linnaeus

PS. Почему-то ноутбук бесится (выжирает память и довольный умирает), если использовать все изображения целиком, поэтому приходится извращаться и уменьшать размер изображения для нормальной работы

Судя по всему, это происходит где-то на svd, ибо там могут получаться большие матрицы при поиска декомпозиции

In [42]:
def mix_random_linnaeus_color(N=3, M=3, figsize=(12, 12), im_shape=(96, 96), svd_solver='numpy', verbose=True, show=True):
    imc_shape = (*im_shape, 3)
    flat_shape = np.prod(imc_shape)

    real = np.array([np.asarray(Image.open(path).resize(im_shape)) / 255 for path in np.random.choice(linna_image_paths, N, replace=False)])
    images = np.append(real, np.ones(shape=(M - N, *imc_shape)), axis=0)

    alphas = np.random.uniform(size=(M, N))
    alphas = alphas / alphas.sum(axis=1)[:, np.newaxis]
    mixes = np.tensordot(alphas, real, axes=(1, 0))

    if verbose:
        print("--------------Algorithm output--------------")
    result = CAMNS_LP(mixes.reshape(M, flat_shape).T, N, svd_solver=svd_solver, verbose=verbose).T.reshape(N, *imc_shape)
    if verbose:
        print("-------------------End----------------------\n\n")
    result = np.clip(result, 0, 1)

    dists, inds = find_closests(real.reshape(N, flat_shape).T, result.reshape(N, flat_shape).T)

    images = np.append(images, mixes, axis=0)
    images = np.append(images, result, axis=0)
    images = np.append(images, np.ones(shape=(M - N, *imc_shape)), axis=0)
    images = np.append(images, result[inds, :, :], axis=0)
    images = np.append(images, np.ones(shape=(M - N, *imc_shape)), axis=0)

    if show:
        show_images(images, adjust=True, figsize=figsize, shape=(4, M))
In [32]:
mix_random_linnaeus_color(
    N=3,
    M=3,
    figsize=(9, 9),
    im_shape=(96, 96),
    svd_solver='scipy.sparse',
    verbose=False
)
In [33]:
mix_random_linnaeus_color(
    N=4,
    M=4,
    figsize=(12, 12),
    im_shape=(96, 96),
    svd_solver='scipy.sparse',
    verbose=False
)
In [34]:
mix_random_linnaeus_color(
    N=4,
    M=7,
    figsize=(15, 20),
    im_shape=(96, 96),
    svd_solver='scipy.sparse',
    verbose=False
)

А теперь сравним время работы при больших N и M

P.S. С увеличением N и M все чаще случаются ситуации с плохими множествами C, d, что возможно случается из-за частых проблем с зависимостями смесей данных

In [47]:
start = datetime.now()
mix_random_linnaeus_color(
    N=20,
    M=40,
    figsize=(15, 20),
    im_shape=(64, 64),
    svd_solver='numpy',
    verbose=False,
    show=False
)
end = datetime.now()
print(f"Finished in {end - start}")
Finished in 0:00:31.357398
In [46]:
start = datetime.now()
mix_random_linnaeus_color(
    N=20,
    M=40,
    figsize=(15, 20),
    im_shape=(64, 64),
    svd_solver='scipy',
    verbose=False,
    show=False
)
end = datetime.now()
print(f"Finished in {end - start}")
Finished in 0:00:21.090365
In [43]:
start = datetime.now()
mix_random_linnaeus_color(
    N=20,
    M=40,
    figsize=(15, 20),
    im_shape=(64, 64),
    svd_solver='scipy.sparse',
    verbose=False,
    show=False
)
end = datetime.now()
print(f"Finished in {end - start}")
Finished in 0:00:13.612484

Результаты

A1-A4.PNG

Prop1.PNG

Prop2.PNG

Алгоритм в целом очень классно работает, не используя никаких нейронок, сложных ML моделей и т.п., а только решая некоторую не очень сложную задачу оптимизации

При этом, несмотря на утверждения выше, он не всегда справляется с вытаскиванием всех исходных картинок корректно. Происходит это скорее всего потому, что

  • Может не выполняться A2 (про local dominant) так как картинки произвольные, чаще всего, везде имеют какой-то ненулевой цвет

  • Может не выполняться A4, так как смеси генерируются рандомно, как следствие - могут случаться линейно зависимые смеси и ранг матрицы наблюдений будет ниже N. От этого спасает увеличение числа смесей картинок. Однако и тогда могут случаться ситуации, когда из-за зависимостей в данных поиск максимума или минимума ломается

Сответственно про такие случаи нельзя сказать, может и должен ли алгоритм работать корректно. При этом, с увеличением числа наблюдений алгоритм начинает работать лучше и давать лучшие показатели, т.к. информации для обработки становится больше, что с больошой вероятностью помогает справиться с A4 и возможно как-то улучшает ситуацию в A2

Кроме того, с временем работы помогает разобраться использование scipy.sparse.linalg.svds вместо scipy.linalg.svd и numpy.linalg.svd, работающее несколько быстрее.


(результат 2 - получено много забавных фотоколлажей с животными в стиле "Вьетнамские флешбеки")

emo.jpg